// Question Generation via Overgenerating Transformations and Ranking
// Copyright (c) 2008, 2009 Carnegie Mellon University.  All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
//    Michael Heilman
//	  Carnegie Mellon University
//	  mheilman@cmu.edu
//	  http://www.cs.cmu.edu/~mheilman



package edu.cmu.ark;

import java.io.*;
//import java.text.NumberFormat;
import java.util.*;

import com.googlecode.mrsqg.evaluation.Instance;
import com.googlecode.mrsqg.evaluation.QGSTEC2010;

//import weka.classifiers.functions.LinearRegression;

//import edu.cmu.ark.ranking.WekaLinearRegressionRanker;
import edu.stanford.nlp.trees.Tree;


/**
 * Wrapper class for outputting a (ranked) list of questions given an entire document,
 * not just a sentence.  It wraps the three stages discussed in the technical report and calls each in turn
 * (along with parsing and other preprocessing) to produce questions.
 *
 * This is the typical class to use for running the system via the command line.
 *
 * Example usage:
 *
    java -server -Xmx800m -cp lib/weka-3-6.jar:lib/stanford-parser-2008-10-26.jar:bin:lib/jwnl.jar:lib/commons-logging.jar:lib/commons-lang-2.4.jar:lib/supersense-tagger.jar:lib/stanford-ner-2008-05-07.jar:lib/arkref.jar \
	edu/cmu/ark/QuestionAsker \
	--verbose --simplify --group \
	--model models/linear-regression-ranker-06-24-2010.ser.gz \
	--prefer-wh --max-length 30 --downweight-pro
 *
 * @author mheilman@cs.cmu.edu
 *
 */
public class QuestionAsker {


	public QuestionAsker(){
	}




	/**
	 * @param args
	 */
	public static void main(String[] args) {
		QuestionTransducer qt = new QuestionTransducer();
		InitialTransformationStep trans = new InitialTransformationStep();
		QuestionRanker qr = null;


		qt.setAvoidPronounsAndDemonstratives(false);

		//pre-load
		AnalysisUtilities.getInstance();

		String buf;
		Tree parsed;
		boolean printVerbose = false;
		String modelPath = null;

		List<Question> outputQuestionList = new ArrayList<Question>();
		boolean preferWH = false;
		boolean doNonPronounNPC = false;
		boolean doPronounNPC = true;
		Integer maxLength = 1000;
		boolean downweightPronouns = false;
		boolean avoidFreqWords = false;
		boolean dropPro = true;
		boolean justWH = false;
		boolean isDev = true;
		String testInFile = null, testOutFile = null;

		for(int i=0;i<args.length;i++){
			if(args[i].equals("--debug")){
				GlobalProperties.setDebug(true);
			}else if(args[i].equals("--verbose")){
				printVerbose = true;
			}else if(args[i].equals("--model")){ //ranking model path
				modelPath = args[i+1];
				i++;
			}else if(args[i].equals("--keep-pro")){
				dropPro = false;
			}else if(args[i].equals("--downweight-pro")){
				dropPro = false;
				downweightPronouns = true;
			}else if(args[i].equals("--downweight-frequent-answers")){
				avoidFreqWords = true;
			}else if(args[i].equals("--properties")){
				GlobalProperties.loadProperties(args[i+1]);
			}else if(args[i].equals("--prefer-wh")){
				preferWH = true;
			}else if(args[i].equals("--just-wh")){
				justWH = true;
			}else if(args[i].equals("--full-npc")){
				doNonPronounNPC = true;
			}else if(args[i].equals("--no-npc")){
				doPronounNPC = false;
			}else if(args[i].equals("--max-length")){
				maxLength = new Integer(args[i+1]);
				i++;
			}else if(args[i].equals("--test-in")){
				testInFile = args[i+1];
				i++;
			}else if(args[i].equals("--test-out")){
				testOutFile = args[i+1];
				i++;
			}else if(args[i].equals("--is-test")){
				isDev = false;
			}
		}

		qt.setAvoidPronounsAndDemonstratives(dropPro);
		trans.setDoPronounNPC(doPronounNPC);
		trans.setDoNonPronounNPC(doNonPronounNPC);

		if(modelPath != null){
			System.err.println("Loading question ranking models from "+modelPath+"...");
			qr = new QuestionRanker();
			qr.loadModel(modelPath);
		}

		if (testInFile != null && testOutFile != null) {

			QGSTEC2010 QGSTEC2010processor = new QGSTEC2010(testInFile, isDev);

			ArrayList<Instance> instanceList = QGSTEC2010processor.getInstanceList();
			String text, questionType;
			ArrayList <Question> quesList;
			try {
				FileOutputStream fop=new FileOutputStream(testOutFile, false);
				for (Instance ins:instanceList) {
					outputQuestionList.clear();
					System.err.println("Sentence: " + ins.getIdNum());
					text = ins.getText();

					List<String> sentences = new ArrayList<String>();
					sentences.add(text);

					//iterate over each segmented sentence and generate questions
					List<Tree> inputTrees = new ArrayList<Tree>();

					for(String sentence: sentences){
						if(GlobalProperties.getDebug()) System.err.println("Question Asker: sentence: "+sentence);

						parsed = AnalysisUtilities.getInstance().parseSentence(sentence).parse;
						inputTrees.add(parsed);
					}

					// up to here inputTrees still has a size of 0

					//step 1 transformations
					List<Question> transformationOutput = trans.transform(inputTrees);

					//step 2 question transducer
					for(Question t: transformationOutput){
						if(GlobalProperties.getDebug()) System.err.println("Stage 2 Input: "+t.getIntermediateTree().yield().toString());
						qt.generateQuestionsFromParse(t);
						outputQuestionList.addAll(qt.getQuestions());
					}

					//remove duplicates
					QuestionTransducer.removeDuplicateQuestions(outputQuestionList);

					//step 3 ranking
					if(qr != null){
						qr.scoreGivenQuestions(outputQuestionList);
						boolean doStemming = true;
						QuestionRanker.adjustScores(outputQuestionList, inputTrees, avoidFreqWords, preferWH, downweightPronouns, doStemming);
						QuestionRanker.sortQuestions(outputQuestionList, false);
					}

					// group questions by type
					HashMap<String, ArrayList<Question>> questionByType = new HashMap<String, ArrayList<Question>>();
					for(Question question: outputQuestionList){
						if(question.getTree().getLeaves().size() > maxLength){
							continue;
						}
						if(justWH && question.getFeatureValue("whQuestion") != 1.0){
							continue;
						}
						//System.out.print(question.yield());
						String type = question.getQuestionType();
						if (!questionByType.containsKey(type))
							questionByType.put(type, new ArrayList<Question>());
						questionByType.get(type).add(question);
					}
						// assign generated question back
					for (int i=0; i<ins.getQuestionTypeList().size(); i++) {
						if (i%2==0) continue;
						questionType = ins.getQuestionTypeList().get(i);
						// retrieve question according to questionType
						quesList = questionByType.get(questionType);
						if (quesList!=null&&quesList.size()!=0) {
							ins.addGenQuestion(quesList.get(0).yield());
							ins.addGenQuestion(quesList.size()>1?quesList.get(1).yield():quesList.get(0).yield());
							ins.addToCandidatesList(quesList.toString());
						} else {
							ins.addGenQuestion("");
							ins.addGenQuestion("");
							ins.addToCandidatesList("");
						}
					}
					// append incrementally
					ins.toXML(fop);
					fop.flush();

				}


				// write it overall again
				QGSTEC2010processor.toXML(fop);
				fop.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
			System.err.println("running test done.");

		} else {

			try{
				BufferedReader br = new BufferedReader(new InputStreamReader(System.in));

				if(GlobalProperties.getDebug()) System.err.println("\nInput Text:");
				String doc;


				while(true){
					outputQuestionList.clear();
					doc = "";
					buf = "";

					buf = br.readLine();
					if(buf == null){
						break;
					}
					doc += buf;

					while(br.ready()){
						buf = br.readLine();
						if(buf == null){
							break;
						}
						if(buf.matches("^.*\\S.*$")){
							doc += buf + " ";
						}else{
							doc += "\n";
						}
					}
					if(doc.length() == 0){
						break;
					}

					long startTime = System.currentTimeMillis();
					List<String> sentences = AnalysisUtilities.getSentences(doc);

					//iterate over each segmented sentence and generate questions
					List<Tree> inputTrees = new ArrayList<Tree>();

					for(String sentence: sentences){
						if(GlobalProperties.getDebug()) System.err.println("Question Asker: sentence: "+sentence);

						parsed = AnalysisUtilities.getInstance().parseSentence(sentence).parse;
						inputTrees.add(parsed);
					}

					if(GlobalProperties.getDebug()) System.err.println("Seconds Elapsed Parsing:\t"+((System.currentTimeMillis()-startTime)/1000.0));

					//step 1 transformations
					List<Question> transformationOutput = trans.transform(inputTrees);

					//step 2 question transducer
					for(Question t: transformationOutput){
						if(GlobalProperties.getDebug()) System.err.println("Stage 2 Input: "+t.getIntermediateTree().yield().toString());
						qt.generateQuestionsFromParse(t);
						outputQuestionList.addAll(qt.getQuestions());
					}

					//remove duplicates
					QuestionTransducer.removeDuplicateQuestions(outputQuestionList);

					//step 3 ranking
					if(qr != null){
						qr.scoreGivenQuestions(outputQuestionList);
						boolean doStemming = true;
						QuestionRanker.adjustScores(outputQuestionList, inputTrees, avoidFreqWords, preferWH, downweightPronouns, doStemming);
						QuestionRanker.sortQuestions(outputQuestionList, false);
					}

					//now print the questions
					//double featureValue;
					for(Question question: outputQuestionList){
						if(question.getTree().getLeaves().size() > maxLength){
							continue;
						}
						if(justWH && question.getFeatureValue("whQuestion") != 1.0){
							continue;
						}
						System.out.print(question.yield());
						if(printVerbose) System.out.print("\t"+AnalysisUtilities.getCleanedUpYield(question.getSourceTree()));
						Tree ansTree = question.getAnswerPhraseTree();
						if(printVerbose) System.out.print("\t");
						if(ansTree != null){
							if(printVerbose) System.out.print(AnalysisUtilities.getCleanedUpYield(question.getAnswerPhraseTree()));
						}
						if(printVerbose) System.out.print("\t"+question.getScore());
						//System.err.println("Answer depth: "+question.getFeatureValue("answerDepth"));

						System.out.println();
					}

					if(GlobalProperties.getDebug()) System.err.println("Seconds Elapsed Total:\t"+((System.currentTimeMillis()-startTime)/1000.0));
					//prompt for another piece of input text
					if(GlobalProperties.getDebug()) System.err.println("\nInput Text:");
				}




			}catch(Exception e){
				e.printStackTrace();
			}
		}
	}


	public static void printFeatureNames(){
		List<String> featureNames = Question.getFeatureNames();
		for(int i=0;i<featureNames.size();i++){
			if(i>0){
				System.out.print("\n");
			}
			System.out.print(featureNames.get(i));
		}
		System.out.println();
	}

}
